#install.packages('tidycensus')
#install.packages('position_dodge')
#library(position_dodge)
library(scales)
library(tidycensus)
library(tidyverse)
library(data.table)
library(readxl)
#install.packages('maps')
library(maps)
library(stargazer)
#install.packages('R.utils')
library(ggrepel)
library(R.utils)
options(tigris_use_cache = TRUE)
https://rconsortium.github.io/censusguide/r-packages-all.html
https://walker-data.com/tidycensus/articles/spatial-data.html
The following code uses the tidycensus package and the Census API to download data + shapefiles from the US Census.
Declare Census product ( decennial).
Get values for total population in 2010 (just for an example), by state. Get shapefiles by geometry=True
Subset to US mainland; filter out Alaska, Puerto Rico, Hawaii. Convert state names to lowercase
`################
#Declare parameters - API key, variables, time
census_api_key("2f1473c692f61175605ea04cbe2a9a1b41d5bf7c")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
yr_select <- c(2010)
#call DF
stpop <- get_decennial(geography = "state",
variables = "P003001",
geometry = TRUE,
year = yr_select) %>% select(-variable) %>%
filter(NAME!="Puerto Rico", NAME!="Alaska", NAME!="Hawaii" )
## Getting data from the 2010 decennial Census
## Using Census Summary File 1
stpop$NAME <- tolower(stpop$NAME)
###############################
redist <- read.csv("./source_data/analysis_data.csv", stringsAsFactors = FALSE)
election_map<-merge(stpop, redist, by.x = "NAME", by.y = "state_full" )
-could make another map graphic that only highlights states that changed from 2000 to 2010
#idvars = c("NAME", "GEOID", "geometry")
# Visualization
##Maps
map2012 <- election_map %>% filter(Year==2012)
## democratic votes
map2012 %>%
ggplot() +
geom_sf(aes(fill=Drawn.by), color="black") +
geom_sf_text(aes(label = State), family = "Times", size = 2.6,
color = "black", alpha = 0.9, hjust = 0.5, vjust = 0.5,
label.padding = unit(0, "pt")) +
labs(title = "Redistricting policies in 2012")+
theme(plot.title =element_text(hjust=0.5, face="bold"), #moves title to the middle of graph, bolds it
text= element_text(family= "Times", size = )) +
scale_fill_manual(values = c("#80b1d3", "#ffffb3", "#98FB98", "#fb8072", "#8dd3c7" , "#436EEE"))
The following plots show that the average level of turnout
# this code uses the dataframe we use later in the script also
redist_turn_short <- election_map %>%
select(State, NAME, Year, Drawn.by, changed, Seats, turnout_perc,
legislature, partisan, independent_commission, court, mid_term) %>%
mutate(turnout_perc = turnout_perc / 100) %>%
filter(Seats != 1,
Year < 2022)
# Dotplot or pointplot
redist_turn_short %>%
ggplot(aes(x = Drawn.by, y = turnout_perc)) +
geom_point(aes(color=turnout_perc)) +
labs(title = "Turnout in Congressional elections, 2000-2020",
x = "Redistricting institution",
y = "Turnout Percentage") +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(face = "bold"))
# Boxplot
turnout_by_drawnby <- redist_turn_short %>%
group_by(Drawn.by) %>%
summarise(turnout_perc = mean(turnout_perc))
redist_turn_short %>%
ggplot(aes(x = Drawn.by, y = turnout_perc)) +
geom_boxplot() +
geom_text(data = turnout_by_drawnby,
aes(x = Drawn.by, y = turnout_perc, label = round(turnout_perc, 2)),
color = "red",
size = 3,
hjust = 0.5,
vjust = 0.5,
# position = position_vdodge(height=0.5)
) +
labs(title = "Turnout Percentage by Drawn By",
x = "Drawn By",
y = "Turnout Percentage") +
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(face = "bold"))
# test for statistical differences in turnout percentage, across the levels of Drawn.by
# p-value =0.059, on the edge of 5% significance
aov_results <- aov(turnout_perc ~ Drawn.by, data = redist_turn_short)
# Print the summary of the ANOVA results
summary(aov_results)
## Df Sum Sq Mean Sq F value Pr(>F)
## Drawn.by 5 0.0302 0.006042 2.144 0.0595 .
## Residuals 414 1.1669 0.002819
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### By state
election_map %>%
ggplot(aes(x=Year, y = turnout_perc, colour = State)) +
geom_line(size = 0.5) +
labs(title = "Dem and Rep voter turnout from 2000-2020", x = "year", y = "percentage of vote by party") +
theme(plot.title = element_text(hjust = 0.5))
# Aggregate to institution
inst_lineplt <- election_map %>% group_by(Institution, Year) %>% summarise(avg_turnout = mean(turnout_perc))
inst_lineplt %>%
ggplot(aes(x=Year, y = avg_turnout, colour = Institution)) +
geom_line(size = .7) +
labs(title = "Avg. Turnout percentage 2000-2020, by redistricing ", x = "year", y = "Turnout percentage") +
theme(plot.title = element_text(hjust = 0.5))
# Aggregate - midterm vs prez
midtm <- election_map %>% filter(mid_term==1) %>%
group_by(Institution, Year) %>%
summarise(avg_turnout = mean(turnout_perc))
prez <- election_map %>% filter(mid_term==0) %>%
group_by(Institution, Year) %>%
summarise(avg_turnout = mean(turnout_perc))
midtm %>%
ggplot(aes(x=Year, y = avg_turnout, colour = Institution)) +
geom_line(size = .7) +
labs(title = "Avg. Turnout - Midterms ", x = "year", y = "Turnout percentage") +
theme(plot.title = element_text(hjust = 0.5))
prez %>%
ggplot(aes(x=Year, y = avg_turnout, colour = Institution)) +
geom_line(size = .7) +
labs(title = "Avg. Turnout -Pres. election, by redistricing ", x = "year", y = "Turnout percentage") +
theme(plot.title = element_text(hjust = 0.5))
Our main independent variable derives from the ‘Institution’ or ‘Drawn.By’ columns. Several options:
Categorical/factor variable
Get dummies for each category (court, legislature, independent etc)
Make custom variables ‘partisan’=1 or 0, and categorize the above cases into it.
# not sure if it's appropriate to use state as a control
turnout_regression <- lm(turnout_perc ~ Institution+mid_term,
data = election_map)
summary(turnout_regression)
##
## Call:
## lm(formula = turnout_perc ~ Institution + mid_term, data = election_map)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.5872 -3.0511 -0.1245 2.9098 15.2116
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 75.5028 0.7600 99.346 < 2e-16 ***
## InstitutionBackup commission -5.1083 1.2742 -4.009 7.08e-05 ***
## InstitutionIndependent commission -5.3257 1.0768 -4.946 1.06e-06 ***
## InstitutionLegislature -3.2784 0.7812 -4.196 3.24e-05 ***
## Institutionn/a -1.4144 0.9498 -1.489 0.1371
## InstitutionPolitician commission -3.2006 1.6449 -1.946 0.0523 .
## mid_term -5.0012 0.4249 -11.770 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.652 on 473 degrees of freedom
## Multiple R-squared: 0.2744, Adjusted R-squared: 0.2652
## F-statistic: 29.81 on 6 and 473 DF, p-value: < 2.2e-16
redist_turn_short <- election_map %>%
select(State, NAME, Year, Drawn.by, changed, Seats, turnout_perc,
legislature, partisan, independent_commission, court, mid_term) %>%
mutate(turnout_perc = turnout_perc / 100) %>%
filter(Seats != 1,
Year < 2022)
redist_turn_short %>%
ggplot(aes(x = Drawn.by, y = turnout_perc)) +
geom_point()
changed_subset <- redist_turn_short %>%
filter(changed == 1)
legislature_regression <- lm(turnout_perc ~ legislature + mid_term,
data = changed_subset)
court_regression <- lm(turnout_perc ~ court + mid_term,
data = changed_subset)
stargazer(legislature_regression, court_regression, type = "text")
##
## ===========================================================
## Dependent variable:
## ----------------------------
## turnout_perc
## (1) (2)
## -----------------------------------------------------------
## legislature -0.008
## (0.008)
##
## court 0.017**
## (0.008)
##
## mid_term -0.051*** -0.051***
## (0.008) (0.008)
##
## Constant 0.714*** 0.703***
## (0.007) (0.006)
##
## -----------------------------------------------------------
## Observations 140 140
## R2 0.246 0.267
## Adjusted R2 0.235 0.257
## Residual Std. Error (df = 137) 0.046 0.045
## F Statistic (df = 2; 137) 22.341*** 24.987***
## ===========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
cong_dist_boundaries <- get_decennial(geography = “cd”, variables = “P003001”, geometry = TRUE, year = 2019)
margin_regression <- lm(dem_margin ~ partisan+mid_term + turnout_perc , data = votes) summary(margin_regression)